# ------------------------------------------------------------------------------
# Replication Materials
# 
# title: Eliciting Beliefs as Distributions in Online Surveys
# journal: Political Analysis
# authors: Lucas Leemann, Richard Traunmüller, and Lukas Stoetzer
# date: August 2020
# ------------------------------------------------------------------------------


  library(tidyverse)
  library(xtable)
  options(xtable.comment = FALSE)
  source("fun/fun_electbeleifs_MLE.R")
  source("fun/fun_utilities.R")
  # library(elicitR)

#+ load, echo=FALSE, include=FALSE
# Load and Clean Data =====

  source("00_DataCleaner_symmetric.R") # Richard Code to clean Data
  dat_sym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_asymmetric.R") # Richard Code to clean Data
  dat_asym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_symmetric_variance.R") # Richard Code to clean Data
  dat_sym_var <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_asymmetric_variance.R") # Richard Code to clean Data
  dat_asym_var <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_nachtrag_sym.R") # Richard Code to clean Data
  dat_nach_sym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_nachtrag_asym.R") # Richard Code to clean Data
  dat_nach_asym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

# Estimation  ============

  res <- list() # Result List

#+ Quantile, echo=FALSE, include=FALSE
## Quantile Question  ============

  # Estimate Model for Quantile Question Symmetric, Low Variance
  res[["sym_lvar_quant"]] <- dat_sym %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta_i()

  # Estimate Model for Quantile Question Asymmetric, Low Variance
  res[["asym_lvar_quant"]]  <- dat_asym %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta_i()

  # Estimate Model for Quantile Question Symmetric, High Variance
  res[["sym_hvar_quant"]] <- dat_sym_var %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta_i()

  # Estimate Model for Quantile Question Asymmetric Variance, High Variance
  res[["asym_hvar_quant"]] <- dat_asym_var %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta_i()

#+ Quantile2, echo=FALSE, include=FALSE
## Quantile Question (without correction)  ============

  # # Estimate Model for Quantile Question Symmetric, Low Variance
  # res[["sym_lvar_qtnsc"]] <- dat_nach_sym %>%
  #   creat_dat_Y(method="QuantileNoCorrect") %>%
  #   est_eB_beta_i()
  # 
  # # Estimate Model for Quantile Question Asymmetric, Low Variance
  # res[["asym_lvar_qtnsc"]]  <- dat_nach_asym %>%
  #   creat_dat_Y(method="Quantile") %>%
  #   est_eB_beta_i()

## Intervall Question (Wide) ============

  # Estimate Model for Quantile Question Symmetric, low variance
  res[["sym_lvar_intwi"]]  <- dat_sym %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int_i(cut.offs = c(0.40,0.60))

  # Estimate Model for Quantile Question Symmetric
  res[["asym_lvar_intwi"]]  <- dat_asym %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int_i(cut.offs = c(0.40,0.60))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["sym_hvar_intwi"]]  <- dat_sym_var %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int_i(cut.offs = c(0.40,0.60))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["asym_hvar_intwi"]]  <- dat_asym_var %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int_i(cut.offs = c(0.40,0.60))

#+ Intervall2, echo=FALSE, include=FALSE
## Intervall Question (Narrow) ============

  # Estimate Model for Quantile Question Symmetric, low variance
  res[["sym_lvar_intna"]]  <- dat_sym %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int_i(cut.offs = c(0.45,0.55))

  # Estimate Model for Quantile Question Symmetric
  res[["asym_lvar_intna"]]  <- dat_asym %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int_i(cut.offs = c(0.45,0.55))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["sym_hvar_intna"]]  <- dat_sym_var %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int_i(cut.offs = c(0.45,0.55))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["asym_hvar_intna"]]  <- dat_asym_var %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int_i(cut.offs = c(0.45,0.55))

#+ Manski, echo=FALSE, include=TRUE
## Manski Question  ============

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_lvar_manski"]]  <- dat_sym %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski_i()
    
  # Estimate Model for Quantile Question Asymmetric, low Variance
  res[["asym_lvar_manski"]]  <- dat_asym %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski_i()

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_hvar_manski"]]  <- dat_sym_var %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski_i()

  # Estimate Model for Quantile Question Asymmetric, low Variance
  res[["asym_hvar_manski"]]  <- dat_asym_var %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski_i()

#+ BinsBalls, echo=FALSE, include=FALSE
## Bins and Balls Question  ============

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_lvar_binba"]]  <- dat_sym %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs_i()


  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["asym_lvar_binba"]]  <- dat_asym %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs_i()

  # Estimate Model for Quantile Question Symmetric, high Variance
  res[["sym_hvar_binba"]]  <- dat_sym_var %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs_i()

  # Estimate Model for Quantile Question Symmetric, high Variance
  res[["asym_hvar_binba"]]  <- dat_asym_var %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs_i()

#+ Figure, echo=FALSE, include=TRUE, fig.cap = "Comparsion of average elecited beliefs and true data distribution"
## Figure Average Estimate  ============

  # Get estimates
  par_est <- as.data.frame(do.call("rbind",sapply(1:length(res), 
                                               function(i) cbind(1:nrow(res[[i]]$shapes),as.matrix(res[[i]]$shapes),names(res)[i]) )))
  names(par_est) <- c("ind","alpha","beta","type")
  
  # Name estimates  & create Factors
  par_est <- par_est %>%
    mutate(alpha = as.numeric(as.character(alpha)),
           beta = as.numeric(as.character(beta))) %>%
    na.omit() %>%
    separate("type",c("type","var","method")) %>%
    mutate(type = factor(type, levels = c("sym","asym"), labels = c("Symmetric","Asymetric")),
           var = factor(var, levels = c("lvar","hvar"),labels = c("Small Variance","Large Variance")),
           method = factor(method, labels = c("Bins and Balls","Interval (Wide)",
                                              "Interval (Narrow)", "Manski", "Quantile")))

  # Expand Data.frame
  expand.grid.df <- function(...) Reduce(function(...) merge(..., by=NULL), list(...))
  df_est <- expand.grid.df(data.frame("x"=seq(0.2,0.8,0.01)), (par_est))

  # Prepare Data for Plot
  df_est       <- mutate(df_est, db = dbeta(x,alpha,beta))
# 
  
  par_true <- data.frame(expand.grid("type"=c("Symmetric","Asymetric"),
                                    "var"=c("Large Variance","Small Variance")) %>%
                          mutate(alpha_true = c(30,30,60,60),
                                 beta_true = c(30,15,60,30)))
  
  df_true <- expand.grid.df(data.frame("x"=seq(0.2,0.8,0.01)), (par_true)) %>%
                mutate(db = dbeta(x,alpha_true,beta_true))
  
  
# 
#   # Doesn't work here.
#   df_est <- left_join(df_est,df_true, by=c("type","var")) %>%
#      mutate(db_true = dbeta(x,alpha_true,beta_true)) %>%
#       select(-alpha_true, -beta_true) %>%
#       gather("db","db_true",key=Scenario, value=val) %>%
#       mutate(Scenario = factor(ifelse(Scenario=="db","Elicited Beliefs","Data Distribution")),
#              Scenario = relevel(Scenario, "Elicited Beliefs"))

  # Plot Data
  ggplot() +
    geom_line(data=df_est,aes(x=x,y=db, group=ind),alpha=0.05) +
    geom_line(data=df_true,aes(x=x,y=db),col="red") +
    facet_grid(method ~ type+var, scales="free") +
    scale_color_grey() +
    theme_minimal() +
    xlab("Vote Share") + ylab("Density") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=12),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.background = element_rect(fill = "grey96",
                             colour = "grey96",
                             size = 0, linetype = "solid"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )

  ggsave("out/fig/Fig_A3.pdf",width = 12,height = 12)

#+ table, echo=FALSE, include=TRUE, results="asis"
## KL Deviations for respondent  ============

  # Calculate average KL over different Data types
  df_KL <- left_join(par_est,par_true) %>%
            rowwise() %>%
            mutate(KL = KL_dis_beta(c(alpha_true,beta_true), c(alpha,beta)))
 
  df_KL_sum <- df_KL %>%
    group_by(method,type, var) %>%
    summarise(median = median(KL),
              qlow = quantile(KL, 0.25),
              qhigh = quantile(KL, 0.75)
              )
  
  ls_est <- split(df_KL_sum, list(df_KL_sum$type, df_KL_sum$var))
  nam_l <- c("A7_c","A7_d","A7_a","A7_b")
  for(nam in names(ls_est)){
    scena <- which(nam == names(ls_est))
    dat_tab <- ls_est[[nam]] %>% arrange(median) %>%
      select(c("method","median","qlow","qhigh"))
    types <- strsplit(nam,"[.]")[[1]]
    text <- paste("Estimates for", types[1],"Distribution with",types[2],sep=" ")
    print(xtable(dat_tab, caption=text), floating = FALSE,booktabs = T, include.rownames=FALSE,
          #file=paste("out/fig/Tab_ElecitedBeliefs_",str_replace(nam," ",""),"_MLE_Resp.tex",sep=""))
          file=paste("out/fig/Tab_",str_replace(nam_l[scena]," ",""),".tex",sep=""))
  }
  
  # Calcualte Kl fover all methods
  df_tab_overall <- df_KL %>%
    group_by(method) %>%
    summarise(median = median(KL),
              qlow = quantile(KL, 0.25),
              qhigh = quantile(KL, 0.75)
    ) %>% arrange(median)
  
 print(xtable(df_tab_overall, caption="KL Divergence overall secenarios"), floating = FALSE,booktabs = T, include.rownames=FALSE,
        file=paste("out/fig/Tab_A8.tex",sep=""))
  